

********************************************************************************
* Create a dataset with basic firm characteristics  2017 
********************************************************************************

* Retrieve table "individ_2017" from SQL database
odbc load, exec("select * from Ftg_2017")dsn("P0846") clear

* Save raw data so I do not need to use odbc each time (which is slow)
save Data/ODBC_Ftg_2017, replace
use Data/ODBC_Ftg_2017, clear
 
* SNI (1992, 2002, 2007)
destring Org_Sni2007, replace
gen SNI_Section_tmp = trunc(Org_Sni2007/1000) 
tab SNI_Section_tmp, miss
replace SNI_Section_tmp = . if SNI_Section_tmp == 0
gen SNI_Section = ""
replace SNI_Section = "Agriculture, forestry and fishing" 										if 1 <= SNI_Section_tmp &  SNI_Section_tmp <= 3
replace SNI_Section = "Mining and quarrying" 													if 5 <= SNI_Section_tmp &  SNI_Section_tmp <= 9
replace SNI_Section = "Manufacturing" 															if 10 <= SNI_Section_tmp &  SNI_Section_tmp <= 33
replace SNI_Section = "Electricity,  gas, steam and air conditioning supply" 					if 35 <= SNI_Section_tmp &  SNI_Section_tmp <= 35
replace SNI_Section = "Water supply; sewerage, waste management and remediation activities" 	if 36 <= SNI_Section_tmp &  SNI_Section_tmp <= 39
replace SNI_Section = "Construction" 		    												if 41 <= SNI_Section_tmp &  SNI_Section_tmp <= 43 
replace SNI_Section = "Wholesale and retail trade; repair of motor vehicles and motorcycles" 	if 45 <= SNI_Section_tmp &  SNI_Section_tmp <= 47
replace SNI_Section = "Transportation and storage"												if 49 <= SNI_Section_tmp &  SNI_Section_tmp <= 53
replace SNI_Section = "Accommodation and food service activities" 								if 55 <= SNI_Section_tmp &  SNI_Section_tmp <= 56
replace SNI_Section = "Information and communication" 											if 58 <= SNI_Section_tmp &  SNI_Section_tmp <= 63
replace SNI_Section = "Financial and insurance activities" 										if 64 <= SNI_Section_tmp &  SNI_Section_tmp <= 66
replace SNI_Section = "Real estate activities" 													if 68 <= SNI_Section_tmp &  SNI_Section_tmp <= 68
replace SNI_Section = "Professional, scientific and technical activities" 						if 69 <= SNI_Section_tmp &  SNI_Section_tmp <= 75
replace SNI_Section = "Administrative and support service activities" 							if 77 <= SNI_Section_tmp &  SNI_Section_tmp <= 82
replace SNI_Section = "Public administration and defence; compulsory social security" 			if 84 <= SNI_Section_tmp &  SNI_Section_tmp <= 84
replace SNI_Section = "Education" 																if 85 <= SNI_Section_tmp &  SNI_Section_tmp <= 85
replace SNI_Section = "Human health and social work activities" 								if 86 <= SNI_Section_tmp &  SNI_Section_tmp <= 88
replace SNI_Section = "Arts, entertainment and recreation" 										if 90 <= SNI_Section_tmp &  SNI_Section_tmp <= 93
replace SNI_Section = "Other service activities" 												if 94 <= SNI_Section_tmp &  SNI_Section_tmp <= 96
replace SNI_Section = "Activities of households as employers" 									if 97 <= SNI_Section_tmp &  SNI_Section_tmp <= 98
replace SNI_Section = "Activities of extraterritorial organisations and bodies" 				if 99 <= SNI_Section_tmp &  SNI_Section_tmp <= 99
drop SNI_Section_tmp
tab SNI_Section, miss

drop  Org_SektorKod
destring Org_Typ, replace

sum Ftg_Antal_anstallda, det

* Adjust for missing values (only variables SNI_Section and Ftg_*)
* These variables are used as controls in robustness analaysis
replace SNI_Section = "Missing" if SNI_Section == "" 
foreach var of varlist Ftg_* {
	gen `var'_miss = missing(`var')
	sum `var' if `var' < .
	replace `var' = r(mean) if missing(`var')
	
}

compress
rename P0846_Lopnr_peorgNr P0846_lopnr_PeOrgNr

save Data/data_firm_X_2017, replace